{
 "nbformat": 4,
 "nbformat_minor": 0,
 "metadata": {
  "colab": {
   "provenance": [],
   "authorship_tag": "ABX9TyNe8pcEyzi3pl5M5WFhQh2o"
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "id": "oD1qjs_ntE_p",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1679030666762,
     "user_tz": 240,
     "elapsed": 3,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "from sklearn.utils import shuffle"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Official script"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "filenames = []\n",
    "for filename in os.listdir(\"AFAVA-AD/\"):\n",
    "  filenames.append(filename)"
   ],
   "metadata": {
    "id": "HssqLKxCVbeo",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1679030740997,
     "user_tz": 240,
     "elapsed": 660,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    }
   },
   "execution_count": 33,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "filenames.sort()\n",
    "# filenames"
   ],
   "metadata": {
    "id": "xQJTR4l-WGfC",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1679030742838,
     "user_tz": 240,
     "elapsed": 531,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    }
   },
   "execution_count": 34,
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [],
   "source": [
    "feature_path = 'Feature'\n",
    "if not os.path.exists(feature_path):\n",
    "    os.mkdir(feature_path)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Save feature"
   ],
   "metadata": {
    "id": "P7S8U9Vufwvl"
   }
  },
  {
   "cell_type": "code",
   "source": [
    "subseq_length = 256\n",
    "stride = 128  # Half of the subsequence length for half-overlapping\n",
    "for i in range(len(filenames)):\n",
    "    # print('Dataset/'+filename)\n",
    "    path = \"AFAVA-AD/\" + filenames[i]\n",
    "    mat = sio.loadmat(path)\n",
    "    mat_np = mat['data']\n",
    "\n",
    "    # Get epoch number for each subject\n",
    "    epoch_num = len(mat_np[0,0][2][0])\n",
    "    print(\"Epoch number: \",epoch_num)\n",
    "    # Each epoch has shape (1280, 16)\n",
    "    temp = np.zeros((epoch_num, 1280, 16))\n",
    "    features = []\n",
    "    # Store in temp\n",
    "    for j in range(epoch_num):\n",
    "        temp[j] = np.transpose(mat_np[0,0][2][0][j])\n",
    "\n",
    "        # Calculate the number of subsequences that can be extracted\n",
    "        num_subsequences = (temp[j].shape[0] - subseq_length) // stride + 1\n",
    "        # Extract the subsequences\n",
    "        subsequences = [temp[j][i * stride : i * stride + subseq_length, :] for i in range(num_subsequences)]\n",
    "        feature = np.array(subsequences)\n",
    "        features.append(feature)\n",
    "    features = np.array(features).reshape((-1, subseq_length, 16))\n",
    "\n",
    "    print(f\"Filename: {filenames[i]}\")\n",
    "    print(f\"Patient ID: {i+1}\")\n",
    "    print(\"Raw data:\", temp.shape)\n",
    "    print(\"Segmented data\", features.shape)\n",
    "    np.save(feature_path + \"/feature_{:02d}.npy\".format(i+1),features)\n",
    "    print(\"Save feature_{:02d}.npy\".format(i+1))\n",
    "    print()"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "-I4cEuRQWKEl",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1679030775070,
     "user_tz": 240,
     "elapsed": 31311,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    },
    "outputId": "3a3e8a50-a266-4a66-b3c8-09b8b7e30c8e"
   },
   "execution_count": 36,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch number:  35\n",
      "Filename: preproctrials01.mat\n",
      "Patient ID: 1\n",
      "Raw data: (35, 1280, 16)\n",
      "Segmented data (315, 256, 16)\n",
      "Save feature_01.npy\n",
      "\n",
      "Epoch number:  25\n",
      "Filename: preproctrials02.mat\n",
      "Patient ID: 2\n",
      "Raw data: (25, 1280, 16)\n",
      "Segmented data (225, 256, 16)\n",
      "Save feature_02.npy\n",
      "\n",
      "Epoch number:  10\n",
      "Filename: preproctrials03.mat\n",
      "Patient ID: 3\n",
      "Raw data: (10, 1280, 16)\n",
      "Segmented data (90, 256, 16)\n",
      "Save feature_03.npy\n",
      "\n",
      "Epoch number:  33\n",
      "Filename: preproctrials04.mat\n",
      "Patient ID: 4\n",
      "Raw data: (33, 1280, 16)\n",
      "Segmented data (297, 256, 16)\n",
      "Save feature_04.npy\n",
      "\n",
      "Epoch number:  1\n",
      "Filename: preproctrials05.mat\n",
      "Patient ID: 5\n",
      "Raw data: (1, 1280, 16)\n",
      "Segmented data (9, 256, 16)\n",
      "Save feature_05.npy\n",
      "\n",
      "Epoch number:  22\n",
      "Filename: preproctrials06.mat\n",
      "Patient ID: 6\n",
      "Raw data: (22, 1280, 16)\n",
      "Segmented data (198, 256, 16)\n",
      "Save feature_06.npy\n",
      "\n",
      "Epoch number:  3\n",
      "Filename: preproctrials07.mat\n",
      "Patient ID: 7\n",
      "Raw data: (3, 1280, 16)\n",
      "Segmented data (27, 256, 16)\n",
      "Save feature_07.npy\n",
      "\n",
      "Epoch number:  32\n",
      "Filename: preproctrials08.mat\n",
      "Patient ID: 8\n",
      "Raw data: (32, 1280, 16)\n",
      "Segmented data (288, 256, 16)\n",
      "Save feature_08.npy\n",
      "\n",
      "Epoch number:  18\n",
      "Filename: preproctrials09.mat\n",
      "Patient ID: 9\n",
      "Raw data: (18, 1280, 16)\n",
      "Segmented data (162, 256, 16)\n",
      "Save feature_09.npy\n",
      "\n",
      "Epoch number:  38\n",
      "Filename: preproctrials10.mat\n",
      "Patient ID: 10\n",
      "Raw data: (38, 1280, 16)\n",
      "Segmented data (342, 256, 16)\n",
      "Save feature_10.npy\n",
      "\n",
      "Epoch number:  47\n",
      "Filename: preproctrials11.mat\n",
      "Patient ID: 11\n",
      "Raw data: (47, 1280, 16)\n",
      "Segmented data (423, 256, 16)\n",
      "Save feature_11.npy\n",
      "\n",
      "Epoch number:  37\n",
      "Filename: preproctrials12.mat\n",
      "Patient ID: 12\n",
      "Raw data: (37, 1280, 16)\n",
      "Segmented data (333, 256, 16)\n",
      "Save feature_12.npy\n",
      "\n",
      "Epoch number:  29\n",
      "Filename: preproctrials13.mat\n",
      "Patient ID: 13\n",
      "Raw data: (29, 1280, 16)\n",
      "Segmented data (261, 256, 16)\n",
      "Save feature_13.npy\n",
      "\n",
      "Epoch number:  39\n",
      "Filename: preproctrials14.mat\n",
      "Patient ID: 14\n",
      "Raw data: (39, 1280, 16)\n",
      "Segmented data (351, 256, 16)\n",
      "Save feature_14.npy\n",
      "\n",
      "Epoch number:  46\n",
      "Filename: preproctrials15.mat\n",
      "Patient ID: 15\n",
      "Raw data: (46, 1280, 16)\n",
      "Segmented data (414, 256, 16)\n",
      "Save feature_15.npy\n",
      "\n",
      "Epoch number:  28\n",
      "Filename: preproctrials16.mat\n",
      "Patient ID: 16\n",
      "Raw data: (28, 1280, 16)\n",
      "Segmented data (252, 256, 16)\n",
      "Save feature_16.npy\n",
      "\n",
      "Epoch number:  59\n",
      "Filename: preproctrials17.mat\n",
      "Patient ID: 17\n",
      "Raw data: (59, 1280, 16)\n",
      "Segmented data (531, 256, 16)\n",
      "Save feature_17.npy\n",
      "\n",
      "Epoch number:  40\n",
      "Filename: preproctrials18.mat\n",
      "Patient ID: 18\n",
      "Raw data: (40, 1280, 16)\n",
      "Segmented data (360, 256, 16)\n",
      "Save feature_18.npy\n",
      "\n",
      "Epoch number:  46\n",
      "Filename: preproctrials19.mat\n",
      "Patient ID: 19\n",
      "Raw data: (46, 1280, 16)\n",
      "Segmented data (414, 256, 16)\n",
      "Save feature_19.npy\n",
      "\n",
      "Epoch number:  37\n",
      "Filename: preproctrials20.mat\n",
      "Patient ID: 20\n",
      "Raw data: (37, 1280, 16)\n",
      "Segmented data (333, 256, 16)\n",
      "Save feature_20.npy\n",
      "\n",
      "Epoch number:  19\n",
      "Filename: preproctrials21.mat\n",
      "Patient ID: 21\n",
      "Raw data: (19, 1280, 16)\n",
      "Segmented data (171, 256, 16)\n",
      "Save feature_21.npy\n",
      "\n",
      "Epoch number:  17\n",
      "Filename: preproctrials22.mat\n",
      "Patient ID: 22\n",
      "Raw data: (17, 1280, 16)\n",
      "Segmented data (153, 256, 16)\n",
      "Save feature_22.npy\n",
      "\n",
      "Epoch number:  2\n",
      "Filename: preproctrials23.mat\n",
      "Patient ID: 23\n",
      "Raw data: (2, 1280, 16)\n",
      "Segmented data (18, 256, 16)\n",
      "Save feature_23.npy\n",
      "\n"
     ]
    }
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Save label"
   ],
   "metadata": {
    "id": "2xpj3Dy6fzQP"
   }
  },
  {
   "cell_type": "code",
   "source": [
    "AD_positive = [1,3,6,8,9,11,12,13,15,17,19,21]"
   ],
   "metadata": {
    "id": "nNALAnXTf4PP"
   },
   "execution_count": 37,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "labels = np.zeros((23, 2))\n",
    "len(labels)"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "029_wjrBf4Sv",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1668385155506,
     "user_tz": 300,
     "elapsed": 163,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    },
    "outputId": "85ab79bb-07c0-4f31-c71a-2915c2d9c988"
   },
   "execution_count": 38,
   "outputs": [
    {
     "data": {
      "text/plain": "23"
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "outputs": [],
   "source": [
    "label_path = 'Label'\n",
    "if not os.path.exists(label_path):\n",
    "    os.mkdir(label_path)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "for i in range(len(labels)):\n",
    "  # The first one is AD label (0 for healthy; 1 for AD patient)\n",
    "  # The second one is the subject label (the order of subject, ranging from 1 to 23.\n",
    "  labels[i][1] = i + 1\n",
    "  if i+1 in AD_positive:\n",
    "    labels[i][0] = 1\n",
    "  else:\n",
    "    labels[i][0] = 0"
   ],
   "metadata": {
    "id": "0XPwH3RJf4YU"
   },
   "execution_count": 40,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "np.save(label_path + \"/label.npy\",labels)\n",
    "print(\"Save label\")"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "LfEWtteafsYn",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1668385694693,
     "user_tz": 300,
     "elapsed": 162,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    },
    "outputId": "d6fa158a-b207-4c9b-c052-618a5982d526"
   },
   "execution_count": 41,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Save label\n"
     ]
    }
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Test"
   ],
   "metadata": {
    "id": "8sgVRTVgc-sj"
   }
  },
  {
   "cell_type": "code",
   "source": [
    "test = np.load(\"Feature/feature_20.npy\")"
   ],
   "metadata": {
    "id": "-KHmMve7WpK-"
   },
   "execution_count": 42,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "test.shape"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "5-9QTN7na61H",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1668384485077,
     "user_tz": 300,
     "elapsed": 155,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    },
    "outputId": "aa2290b1-e4dc-4124-8922-6eadbbded900"
   },
   "execution_count": 43,
   "outputs": [
    {
     "data": {
      "text/plain": "(333, 256, 16)"
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "test_label = np.load(\"Label/label.npy\")\n",
    "test_label"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "fALGwF4ndcr0",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1668385699716,
     "user_tz": 300,
     "elapsed": 177,
     "user": {
      "displayName": "Lucian",
      "userId": "17113640720459928730"
     }
    },
    "outputId": "7b5db342-5b77-48ff-bb9f-2298a879cdda"
   },
   "execution_count": 44,
   "outputs": [
    {
     "data": {
      "text/plain": "array([[ 1.,  1.],\n       [ 0.,  2.],\n       [ 1.,  3.],\n       [ 0.,  4.],\n       [ 0.,  5.],\n       [ 1.,  6.],\n       [ 0.,  7.],\n       [ 1.,  8.],\n       [ 1.,  9.],\n       [ 0., 10.],\n       [ 1., 11.],\n       [ 1., 12.],\n       [ 1., 13.],\n       [ 0., 14.],\n       [ 1., 15.],\n       [ 0., 16.],\n       [ 1., 17.],\n       [ 0., 18.],\n       [ 1., 19.],\n       [ 0., 20.],\n       [ 1., 21.],\n       [ 0., 22.],\n       [ 0., 23.]])"
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ]
  },
  {
   "cell_type": "code",
   "source": [],
   "metadata": {
    "id": "P9btuIIviM84"
   },
   "execution_count": 44,
   "outputs": []
  }
 ]
}
